import jieba
import jieba.analyse
import jieba.posseg as pseg
import codecs,sys

def cut_words(sentence):
    return ''.join(jieba.cut(sentence)).encode('utf-8')
f = codecs.open('wiki.all.jian.txt','r',encoding='utf-8')
target = codecs.open('wiki.all.jian.seg.txt','w',encoding='utf-8')
print('open files')

lin_num = 1
line = f.readline()
while line :
    if lin_num%1000 == 0:
        print('-----------processing',lin_num,'article------')
    line_seg = ' '.join(jieba.cut(line))
    target.writelines(line_seg)
    lin_num+=1
    line = f.readline()
f.close()
target.close()
sys.exit()